##Study 2 data set up.R
##Recoding of variables and other data preparation for Study 2 (TESS energy study)
##Harbridge-Yong and Paris LSQ
##R version 3.6.1 

##analysis of data with negative timers replaced with new Nov. data collection
##updates analysis to use indicator for OPPOSING PARTY
##uses manually coding of knowledge correct (and changes 77=incorrect because don't know)

setwd("...")  ##SET WORKING DIRECTORY HERE

library(foreign)
data<- read.dta("Study_2_TESS027_HarbridgeYong_Paris_stata_12.dta", convert.factors=FALSE)

######################################
##convert 98 to NA (but for FT 998)
library(dplyr)
data1<- data
data1$Q2<- na_if(data1$Q2, "98")
data1$Q3<- na_if(data1$Q3, "98")
data1$Q4<- na_if(data1$Q4, "98")
data1$Q5A<- na_if(data1$Q5A, "98")
data1$Q5B<- na_if(data1$Q5B, "98")
data1$Q5C<- na_if(data1$Q5C, "98")
data1$Q5D<- na_if(data1$Q5D, "98")
data1$Q5E<- na_if(data1$Q5E, "98")
data1$Q5F<- na_if(data1$Q5F, "98")
data1$Q6<- na_if(data1$Q6, "998")
data1$Q7<- na_if(data1$Q7, "998")
data1$Q8<- na_if(data1$Q8, "98")
data1$Q8A<- na_if(data1$Q8A, "98")
##Q9 and Q10 and Q11 - knowledge questions, manually code open ended as correct or incorrect
##these are now Q9_correct, Q10_correct, Q11_correct
data1$PartyID7<- na_if(data1$PartyID7, "-1")
data1$RELIG<- na_if(data1$RELIG, "98")
data1$ATTEND<- na_if(data1$ATTEND, "98")
data1$IDEO<- na_if(data1$IDEO,"-1")
data1$GENDER<- na_if(data1$GENDER, "0")
data1$STATE<- na_if(data1$STATE, "77")
data1$STATE<- na_if(data1$STATE, "99")

##check knowledge questions
data1$Q9_correct_update<- ifelse(data1$Q9==77, 0, data1$Q9_correct)
summary(as.factor(data1$Q9_correct))
summary(as.factor(data1$Q9_correct_update))
data1$Q10_correct_update<- ifelse(data1$Q10==77, 0, data1$Q10_correct)
summary(as.factor(data1$Q10_correct))
summary(as.factor(data1$Q10_correct_update))
data1$Q11_correct_update<- ifelse(data1$Q11==77, 0, data1$Q11_correct)
summary(as.factor(data1$Q11_correct))
summary(as.factor(data1$Q11_correct_update))

##Create vignette duration (end-start) 
summary(as.numeric(data1$VIGNETTE_START)) ##0 to 1490
summary(data1$VIGNETTE_START)
summary(as.numeric(data1$VIGNETTE_END)) ##1 to 2069
summary(data1$VIGNETTE_END)
##need to create duration measure
data1$duration_VIGNETTE<- data1$VIGNETTE_END - data1$VIGNETTE_START
summary(data1$duration_VIGNETTE) ##30 respondents have negative duration
##due to closing one browser and opening another


####################################
##        Data checks             ##
####################################
##check all variables for missing data
summary(data1$Q2) ##3 NA
summary(data1$Q3) ##6 NA
summary(data1$Q4) ##16 NA
summary(data1$Q5A) ##19 NA
summary(data1$Q5B) ##24 NA
summary(data1$Q5C) ##22 NA
summary(data1$Q5D) ##23 NA
summary(data1$Q5E) ##28 NA
summary(data1$Q5F) ##26 NA
summary(data1$Q6) ##76 NA
summary(data1$Q7) ##66 NA
summary(data1$Q8) ##23 NA
summary(data1$Q8A) ##19 NA
summary(data1$Q9_correct) ##890 NA
summary(data1$Q10_correct) ##555 NA
summary(data1$Q11_correct) ##371 NA

##check sample demographics without sample weights
table(data1$RELIG)
table(data1$ATTEND)
table(data1$PartyID7) ##995 Dem, 370 Pure Ind, 736 Rep
table(data1$PARTYID2) ##PID asked Jan 2018 or later for 1935 (only 134 have older PID)
table(data1$IDEO)
table(data1$GENDER) ##1025 Male, 1076 Female
table(data1$AGE4) ##298 18-29, 616 30-44, 518 45-59, 669 60+
table(data1$RACETHNICITY) ##1396 white, 249 black, 33 other, 292 hispanic, 68 2+ hispanic, 63 Asian
table(data1$EDUC4) ##80 No HS dip, 355 HS, 946 College college, 720 BA or above
table(data1$MARITAL)
table(data1$EMPLOY)
table(data1$INCOME)
table(data1$STATE)
table(data1$REGION4) ##309 NE, 585 MW, 718 S, 489 W

##check overall duraction
summary(data1$duration)

####################################
##   Data set up for analysis     ##
####################################
##Recode DVs to 0-1 scale
data1$cong<- (data1$Q2-1)/4
summary(data1$cong)

data1$bill<- (data1$Q3-1)/6
summary(data1$bill)

data1$fair<- (data1$Q4-1)/6
summary(data1$fair)

data1$motive.goodpolicy<- (data1$Q5A-1)/4
summary(data1$motive.goodpolicy)

data1$motive.defeat<- (data1$Q5B-1)/4
summary(data1$motive.defeat)

data1$motive.base<- (data1$Q5C-1)/4
summary(data1$motive.base)

data1$motive.win<- (data1$Q5D-1)/4
summary(data1$motive.win)

data1$motive.refuselisten<- (data1$Q5E-1)/4
summary(data1$motive.refuselisten)

data1$motive.nofair<- (data1$Q5F-1)/4
summary(data1$motive.nofair)

data1$ft.dem<- (data1$Q6)/100
summary(data1$ft.dem)

data1$ft.rep<- (data1$Q7)/100
summary(data1$ft.rep)

data1$news<- (data1$Q8-1)/4
summary(data1$news)

##Respondent party 3
data1$pid3<- ifelse(data1$PartyID7 <=3, "Dem",
                    ifelse(data1$PartyID7==4, "Ind", ##ind only as pure ind
                           ifelse(data1$PartyID7>=5, "Rep", NA)))
table(data1$pid3)

##Party ID with leaners as independents
data1$pid3.indpluslean<- ifelse(data1$PartyID7 <=2, "Dem",
                    ifelse(data1$PartyID7==3 | data1$PartyID7==4 | data1$PartyID7==5 , "Ind", ##ind and lean as pure ind
                           ifelse(data1$PartyID7>=6, "Rep", NA)))
table(data1$pid3.indpluslean)

##strong partisans
data1$strong.partisan<- ifelse(data1$PartyID7==1 | data1$PartyID7==7, 1, 0)
summary(as.factor(data1$strong.partisan))

##Treatment variables
##majority party
data1$maj.dem<- ifelse(data1$DOV_VIG==1 | data1$DOV_VIG==2 | data1$DOV_VIG==3, 1, 0)
summary(data1$maj.dem)
data1$maj.rep<- ifelse(data1$DOV_VIG==4 | data1$DOV_VIG==5 | data1$DOV_VIG==6 |
                         data1$DOV_VIG==7 | data1$DOV_VIG==8, 1, 0)
summary(data1$maj.rep)
xtabs(~data1$maj.dem + data1$maj.rep)

##majority party relative to own party (includes independents with opposing party for now)
data1$maj.own<- ifelse((data1$pid3=="Dem" & data1$maj.dem==1) |
                         (data1$pid3=="Rep" & data1$maj.rep==1), 1, 0)
summary(data1$maj.own)

data1$maj.other<- ifelse((data1$pid3=="Dem" & data1$maj.rep==1) |
                         (data1$pid3=="Rep" & data1$maj.dem==1) |
                           (data1$pid3=="Ind"), 1, 0) ##Note that codes opposing party =1 for ind
summary(data1$maj.other)
xtabs(~ data1$maj.own + data1$maj.other)

##FT for majority and minority
data1$ft.maj<- ifelse(data1$maj.dem==1, data1$ft.dem,
                      ifelse(data1$maj.rep==1, data1$ft.rep, NA))
summary(data1$ft.maj)

data1$ft.min<- ifelse(data1$maj.dem==1, data1$ft.rep,
                      ifelse(data1$maj.rep==1, data1$ft.dem, NA))
summary(data1$ft.min)

##treatments
data1$treatment.bipart<- ifelse((data1$DOV_VIG==2) | (data1$DOV_VIG==5) | (data1$DOV_VIG==6), 1, 0)
data1$treatment.min<- ifelse((data1$DOV_VIG==3) | (data1$DOV_VIG==7) | (data1$DOV_VIG==8), 1, 0)
data1$gastax<- ifelse((data1$DOV_VIG==6) | (data1$DOV_VIG==8), 1, 0) ##Just 1 for gas tax treatments
table(data1$gastax)
data1$gastax.plus.control<- ifelse((data1$DOV_VIG==4) | (data1$DOV_VIG==6) | (data1$DOV_VIG==8), 1, 0) ##1 for gas tax treatments and GOP Control
table(data1$gastax.plus.control)
##categorical variable for treatment
data1$treatment.category<- ifelse(data1$treatment.bipart==1, "Ignore Bipartisan",
                                  ifelse(data1$treatment.min==1, "Ignore Minority", "Control"))

##break apart Rep majority treatments by issue
data1$treatment.bipart.green<- ifelse(data1$DOV_VIG==5, 1, 0)
data1$treatment.bipart.gastax<- ifelse(data1$DOV_VIG==6, 1, 0)
data1$treatment.min.green<- ifelse(data1$DOV_VIG==7, 1, 0)
data1$treatment.min.gastax<- ifelse(data1$DOV_VIG==8, 1, 0)

##manipulation check correct
data1$manip.correct<- ifelse((data1$maj.dem==1 & data1$Q8A==2) | (data1$maj.rep==1 & data1$Q8A==1), 1, 0)
table(data1$manip.correct) ##1388 correct, 694 incorrect

##index for knowledge (number correct and proportion correct)
data1$knowledge_correct<- (data1$Q9_correct_update + data1$Q10_correct_update + data1$Q11_correct_update) ##Note that this is NA for anyone who skipped 1 or more questions
summary(data1$knowledge_correct) #median is 2 correct
data1$knowledge_prop_correct<- data1$knowledge_correct/3
summary(data1$knowledge_prop_correct) ##43 missing

##Do people think the news covers this?
table(data1$Q8) ##
(62+207)/(62+207+589+578+642) ##13% this nearly all or most
(589)/(62+207+589+578+642) ##28% this some of the time
(578+642)/(62+207+589+578+642) ##59% think few or none

##identify possible speeders as less than 10 seconds
data1$speeder<- ifelse(data1$duration_VIGNETTE < 10, 1, 0)
table(data1$speeder) ##241

##create indicator for answered main DVs (cong, bill, fair)
data1$answer.main<- ifelse(!is.na(data1$cong) & !is.na(data1$bill) & !is.na(data1$fair), 1, 0)
summary(as.factor(data1$answer.main)) ##drop 20 respondents


###################################################
##   Save data for analysis and figures          ##
###################################################
write.dta(data1, "Study_2_TESS_for_analysis.dta")
##Note that this includes respondents who didn't answer main 3

